In [ ]:
import pandas as pd
import numpy as np
In [ ]:
df = pd.read_csv('/content/banana_quality.csv')
In [ ]:
df
Out[ ]:
Size Weight Sweetness Softness HarvestTime Ripeness Acidity Quality
0 -1.924968 0.468078 3.077832 -1.472177 0.294799 2.435570 0.271290 Good
1 -2.409751 0.486870 0.346921 -2.495099 -0.892213 2.067549 0.307325 Good
2 -0.357607 1.483176 1.568452 -2.645145 -0.647267 3.090643 1.427322 Good
3 -0.868524 1.566201 1.889605 -1.273761 -1.006278 1.873001 0.477862 Good
4 0.651825 1.319199 -0.022459 -1.209709 -1.430692 1.078345 2.812442 Good
... ... ... ... ... ... ... ... ...
7995 -6.414403 0.723565 1.134953 2.952763 0.297928 -0.156946 2.398091 Bad
7996 0.851143 -2.217875 -2.812175 0.489249 -1.323410 -2.316883 2.113136 Bad
7997 1.422722 -1.907665 -2.532364 0.964976 -0.562375 -1.834765 0.697361 Bad
7998 -2.131904 -2.742600 -1.008029 2.126946 -0.802632 -3.580266 0.423569 Bad
7999 -2.660879 -2.044666 0.159026 1.499706 -1.581856 -1.605859 1.435644 Bad

8000 rows × 8 columns

Distributions

No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

2-d distributions

No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Time series

No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Values

No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
numerical = []
categorical = []
for column in df.columns:
  if df[column].dtype!='O':
    numerical.append(column)
  else:
    categorical.append(column)

print(numerical)
print(categorical)
['Size', 'Weight', 'Sweetness', 'Softness', 'HarvestTime', 'Ripeness', 'Acidity']
['Quality']
In [ ]:
df.isnull().sum()
Out[ ]:
Size           0
Weight         0
Sweetness      0
Softness       0
HarvestTime    0
Ripeness       0
Acidity        0
Quality        0
dtype: int64
In [ ]:
unique_values = {}
for feature in categorical:
    unique_values[feature] = df[feature].unique()

df_unique_values = pd.DataFrame.from_dict([unique_values]).transpose().reset_index()

df_unique_values.columns = ['Categorical Feature','Unique_values']
df_unique_values['Number of Unique Values'] = df_unique_values['Unique_values'].apply(lambda x: len(x))

df_unique_values
Out[ ]:
Categorical Feature Unique_values Number of Unique Values
0 Quality [Good, Bad] 2
In [ ]:
df['Quality'] = df['Quality'].replace({'Good': 0, 'Bad': 1})
In [ ]:
df
Out[ ]:
Size Weight Sweetness Softness HarvestTime Ripeness Acidity Quality
0 -1.924968 0.468078 3.077832 -1.472177 0.294799 2.435570 0.271290 0
1 -2.409751 0.486870 0.346921 -2.495099 -0.892213 2.067549 0.307325 0
2 -0.357607 1.483176 1.568452 -2.645145 -0.647267 3.090643 1.427322 0
3 -0.868524 1.566201 1.889605 -1.273761 -1.006278 1.873001 0.477862 0
4 0.651825 1.319199 -0.022459 -1.209709 -1.430692 1.078345 2.812442 0
... ... ... ... ... ... ... ... ...
7995 -6.414403 0.723565 1.134953 2.952763 0.297928 -0.156946 2.398091 1
7996 0.851143 -2.217875 -2.812175 0.489249 -1.323410 -2.316883 2.113136 1
7997 1.422722 -1.907665 -2.532364 0.964976 -0.562375 -1.834765 0.697361 1
7998 -2.131904 -2.742600 -1.008029 2.126946 -0.802632 -3.580266 0.423569 1
7999 -2.660879 -2.044666 0.159026 1.499706 -1.581856 -1.605859 1.435644 1

8000 rows × 8 columns

In [ ]:
df[numerical[:]].hist(figsize=(10,10),bins=100)
Out[ ]:
array([[<Axes: title={'center': 'Size'}>,
        <Axes: title={'center': 'Weight'}>,
        <Axes: title={'center': 'Sweetness'}>],
       [<Axes: title={'center': 'Softness'}>,
        <Axes: title={'center': 'HarvestTime'}>,
        <Axes: title={'center': 'Ripeness'}>],
       [<Axes: title={'center': 'Acidity'}>, <Axes: >, <Axes: >]],
      dtype=object)
No description has been provided for this image
In [ ]:
import seaborn as sns

# Combine the features and target variable into one DataFrame
df_combined = X_train.copy()
df_combined['Quality'] = y_train

# Plot scatter plots for every pair of features against each other
sns.pairplot(df_combined, hue='Quality', diag_kind='kde')
plt.show()
No description has been provided for this image
In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt

# Plot distribution plots for each feature
for feature in X_train.columns:
    sns.displot(X_train[feature], kde=True)
    plt.title(f'Distribution of {feature}')
    plt.xlabel(feature)
    plt.ylabel('Density')
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
df_scaled = df.copy()
for column in numerical:
    df_scaled[column] = (df[column] - df[column].min()) / (df[column].max() - df[column].min())

df_scaled
Out[ ]:
Size Weight Sweetness Softness HarvestTime Ripeness Acidity Quality
0 0.405073 0.599374 0.911017 0.384425 0.604782 0.640332 0.518495 0
1 0.363736 0.600941 0.628708 0.302919 0.491862 0.607889 0.521215 0
2 0.538722 0.683992 0.754984 0.290963 0.515164 0.698081 0.605769 0
3 0.495156 0.690913 0.788183 0.400234 0.481011 0.590738 0.534090 0
4 0.624796 0.670323 0.590524 0.405338 0.440636 0.520684 0.710337 0
... ... ... ... ... ... ... ... ...
7995 0.022259 0.620671 0.710171 0.737001 0.605080 0.411786 0.679056 1
7996 0.641792 0.375475 0.302137 0.540710 0.450842 0.221374 0.657544 1
7997 0.690530 0.401334 0.331062 0.578615 0.523239 0.263876 0.550661 1
7998 0.387428 0.331734 0.488640 0.671200 0.500384 0.109999 0.529991 1
7999 0.342322 0.389914 0.609285 0.621222 0.426256 0.284055 0.606397 1

8000 rows × 8 columns

In [ ]:
# import the necessary libraries
import seaborn as sns
import matplotlib.pyplot as plt

# to ignore the warnings
from warnings import filterwarnings

for feature in numerical:
  if feature!='index':
      fig = plt.figure()
      plt.xlabel("feature")
      sns.distplot(df[feature])
<ipython-input-10-304298f86003>:12: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(df[feature])
<ipython-input-10-304298f86003>:12: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(df[feature])
<ipython-input-10-304298f86003>:12: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(df[feature])
<ipython-input-10-304298f86003>:12: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(df[feature])
<ipython-input-10-304298f86003>:12: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(df[feature])
<ipython-input-10-304298f86003>:12: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(df[feature])
<ipython-input-10-304298f86003>:12: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(df[feature])
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
# import the necessary libraries
import seaborn as sns
import matplotlib.pyplot as plt

# to ignore the warnings
from warnings import filterwarnings

for feature in numerical:
  if feature!='index':
      fig = plt.figure()
      sns.boxplot(df[feature])
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
import matplotlib.pyplot as plt
for column in numerical:
    Q1 = df[column].quantile(.25)
    Q3 = df[column].quantile(.75)
    IQR = Q3 - Q1
    UB = Q3 + 1.5 * IQR
    LB = Q1 - 1.5 * IQR
    df.loc[df[column] >= UB, column] = UB
    df.loc[df[column] <= LB, column] = LB
    plt.figure()
    df[[column]].boxplot()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:

Model Training and Testing¶

In [ ]:
import numpy

from sklearn.model_selection import train_test_split

X = df.drop(['Quality'] ,axis=1)
y = df['Quality']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
In [ ]:
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier

# Define models
models = [
    {
        'name': 'Logistic Regression',
        'model': LogisticRegression(),
        'params': {}
    },
    {
        'name': 'KNN',
        'model': KNeighborsClassifier(),
        'params': {
            'n_neighbors': [3, 5, 7, 9]
        }
    },
    {
        'name': 'Gaussian Naive Bayes',
        'model': GaussianNB(),
        'params': {}
    },
    {
        'name': 'Decision Tree',
        'model': DecisionTreeClassifier(),
        'params': {
            'max_depth': [None, 5, 10, 15, 20]
        }
    }
]

# Perform GridSearchCV for each model
for model_info in models:
    print(f"Performing GridSearchCV for {model_info['name']}...")
    grid_search = GridSearchCV(model_info['model'], model_info['params'], cv=5)
    grid_search.fit(X_train, y_train)
    print("Best parameters found:", grid_search.best_params_)
    print("Best score found:", grid_search.best_score_)
    print("\n")
Performing GridSearchCV for Logistic Regression...
Best parameters found: {}
Best score found: 0.8768750000000001


Performing GridSearchCV for KNN...
Best parameters found: {'n_neighbors': 7}
Best score found: 0.9806250000000001


Performing GridSearchCV for Gaussian Naive Bayes...
Best parameters found: {}
Best score found: 0.885


Performing GridSearchCV for Decision Tree...
Best parameters found: {'max_depth': 10}
Best score found: 0.93484375


In [ ]:
from sklearn.metrics import accuracy_score

# Create KNN model with n_neighbors=5
knn = KNeighborsClassifier(n_neighbors=5)

# Fit the model
knn.fit(X_train, y_train)

# Predict on the test data
y_pred = knn.predict(X_test)

# Calculate accuracy score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy Score:", accuracy)
Accuracy Score: 0.980625
In [ ]:
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve

# Assuming y_Test and y_pred are your true labels and predicted labels respectively

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)
Confusion Matrix:
[[808  11]
 [ 11 770]]
In [ ]:

In [ ]:
sns.heatmap(cm,
            annot=True,
            fmt='g',
            xticklabels=['0','1'],
            yticklabels=['0','1'])
plt.ylabel('Prediction',fontsize=13)
plt.xlabel('Actual',fontsize=13)
plt.title('Confusion Matrix',fontsize=17)
plt.show()
No description has been provided for this image
In [ ]:
def plot_roc(y_Test,y_pred):
   fpr,tpr,thresholds = roc_curve(y_Test,y_pred)
   plt.plot(fpr,tpr)
   plt.xlabel("False positive rate")
   plt.ylabel("True positive rate")
   plt.show()

plot_roc(y_test,y_pred)
No description has been provided for this image
In [ ]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
# Define the model
model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train.shape[1],)),  # Hidden layer with 64 units and ReLU activation
    Dense(256,activation='relu'),
    Dropout(0.2),  # Dropout layer with a dropout rate of 0.2
    Dense(1, activation='sigmoid')  # Output layer with 1 unit and sigmoid activation for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Loss:", loss)
print("Accuracy:", accuracy)


# Plot accuracy
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Testing Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plot loss
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Testing Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Predict on test data
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)

# Create confusion matrix
cm = confusion_matrix(y_test, y_pred_classes)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()
Epoch 1/50
200/200 [==============================] - 2s 7ms/step - loss: 0.1440 - accuracy: 0.9498 - val_loss: 0.0869 - val_accuracy: 0.9725
Epoch 2/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0989 - accuracy: 0.9711 - val_loss: 0.0910 - val_accuracy: 0.9675
Epoch 3/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0888 - accuracy: 0.9727 - val_loss: 0.0823 - val_accuracy: 0.9806
Epoch 4/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0840 - accuracy: 0.9772 - val_loss: 0.0756 - val_accuracy: 0.9769
Epoch 5/50
200/200 [==============================] - 1s 7ms/step - loss: 0.0827 - accuracy: 0.9758 - val_loss: 0.0728 - val_accuracy: 0.9781
Epoch 6/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0801 - accuracy: 0.9777 - val_loss: 0.0773 - val_accuracy: 0.9781
Epoch 7/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0739 - accuracy: 0.9784 - val_loss: 0.0784 - val_accuracy: 0.9794
Epoch 8/50
200/200 [==============================] - 2s 8ms/step - loss: 0.0753 - accuracy: 0.9798 - val_loss: 0.0713 - val_accuracy: 0.9812
Epoch 9/50
200/200 [==============================] - 2s 8ms/step - loss: 0.0720 - accuracy: 0.9797 - val_loss: 0.0832 - val_accuracy: 0.9756
Epoch 10/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0688 - accuracy: 0.9797 - val_loss: 0.0680 - val_accuracy: 0.9831
Epoch 11/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0682 - accuracy: 0.9812 - val_loss: 0.0677 - val_accuracy: 0.9806
Epoch 12/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0655 - accuracy: 0.9820 - val_loss: 0.0669 - val_accuracy: 0.9825
Epoch 13/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0676 - accuracy: 0.9812 - val_loss: 0.0670 - val_accuracy: 0.9844
Epoch 14/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0628 - accuracy: 0.9803 - val_loss: 0.0694 - val_accuracy: 0.9819
Epoch 15/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0639 - accuracy: 0.9822 - val_loss: 0.0693 - val_accuracy: 0.9819
Epoch 16/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0596 - accuracy: 0.9834 - val_loss: 0.0718 - val_accuracy: 0.9794
Epoch 17/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0587 - accuracy: 0.9816 - val_loss: 0.0670 - val_accuracy: 0.9850
Epoch 18/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0565 - accuracy: 0.9830 - val_loss: 0.0646 - val_accuracy: 0.9837
Epoch 19/50
200/200 [==============================] - 2s 8ms/step - loss: 0.0537 - accuracy: 0.9844 - val_loss: 0.0789 - val_accuracy: 0.9812
Epoch 20/50
200/200 [==============================] - 1s 7ms/step - loss: 0.0564 - accuracy: 0.9830 - val_loss: 0.0642 - val_accuracy: 0.9844
Epoch 21/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0540 - accuracy: 0.9830 - val_loss: 0.0766 - val_accuracy: 0.9831
Epoch 22/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0562 - accuracy: 0.9841 - val_loss: 0.0649 - val_accuracy: 0.9837
Epoch 23/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0509 - accuracy: 0.9858 - val_loss: 0.0856 - val_accuracy: 0.9787
Epoch 24/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0512 - accuracy: 0.9866 - val_loss: 0.0693 - val_accuracy: 0.9831
Epoch 25/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0506 - accuracy: 0.9855 - val_loss: 0.0726 - val_accuracy: 0.9825
Epoch 26/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0467 - accuracy: 0.9862 - val_loss: 0.0773 - val_accuracy: 0.9800
Epoch 27/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0477 - accuracy: 0.9859 - val_loss: 0.0732 - val_accuracy: 0.9812
Epoch 28/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0459 - accuracy: 0.9858 - val_loss: 0.0784 - val_accuracy: 0.9831
Epoch 29/50
200/200 [==============================] - 1s 7ms/step - loss: 0.0440 - accuracy: 0.9861 - val_loss: 0.0836 - val_accuracy: 0.9837
Epoch 30/50
200/200 [==============================] - 2s 8ms/step - loss: 0.0470 - accuracy: 0.9861 - val_loss: 0.0944 - val_accuracy: 0.9800
Epoch 31/50
200/200 [==============================] - 1s 4ms/step - loss: 0.0438 - accuracy: 0.9878 - val_loss: 0.0749 - val_accuracy: 0.9837
Epoch 32/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0441 - accuracy: 0.9869 - val_loss: 0.0859 - val_accuracy: 0.9812
Epoch 33/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0436 - accuracy: 0.9867 - val_loss: 0.0807 - val_accuracy: 0.9844
Epoch 34/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0397 - accuracy: 0.9872 - val_loss: 0.0903 - val_accuracy: 0.9837
Epoch 35/50
200/200 [==============================] - 1s 4ms/step - loss: 0.0422 - accuracy: 0.9867 - val_loss: 0.0825 - val_accuracy: 0.9856
Epoch 36/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0386 - accuracy: 0.9884 - val_loss: 0.1002 - val_accuracy: 0.9844
Epoch 37/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0375 - accuracy: 0.9892 - val_loss: 0.0848 - val_accuracy: 0.9825
Epoch 38/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0369 - accuracy: 0.9878 - val_loss: 0.0875 - val_accuracy: 0.9831
Epoch 39/50
200/200 [==============================] - 2s 8ms/step - loss: 0.0387 - accuracy: 0.9873 - val_loss: 0.0934 - val_accuracy: 0.9837
Epoch 40/50
200/200 [==============================] - 2s 9ms/step - loss: 0.0372 - accuracy: 0.9886 - val_loss: 0.0922 - val_accuracy: 0.9837
Epoch 41/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0367 - accuracy: 0.9884 - val_loss: 0.0908 - val_accuracy: 0.9831
Epoch 42/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0339 - accuracy: 0.9894 - val_loss: 0.1027 - val_accuracy: 0.9837
Epoch 43/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0332 - accuracy: 0.9903 - val_loss: 0.1135 - val_accuracy: 0.9831
Epoch 44/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0322 - accuracy: 0.9895 - val_loss: 0.0974 - val_accuracy: 0.9844
Epoch 45/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0309 - accuracy: 0.9897 - val_loss: 0.0987 - val_accuracy: 0.9837
Epoch 46/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0335 - accuracy: 0.9887 - val_loss: 0.1045 - val_accuracy: 0.9831
Epoch 47/50
200/200 [==============================] - 1s 6ms/step - loss: 0.0310 - accuracy: 0.9898 - val_loss: 0.1131 - val_accuracy: 0.9812
Epoch 48/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0312 - accuracy: 0.9898 - val_loss: 0.0995 - val_accuracy: 0.9862
Epoch 49/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0334 - accuracy: 0.9894 - val_loss: 0.1090 - val_accuracy: 0.9837
Epoch 50/50
200/200 [==============================] - 1s 5ms/step - loss: 0.0326 - accuracy: 0.9894 - val_loss: 0.1106 - val_accuracy: 0.9831
50/50 [==============================] - 0s 3ms/step - loss: 0.1106 - accuracy: 0.9831
Loss: 0.11059460788965225
Accuracy: 0.9831249713897705
No description has been provided for this image
No description has been provided for this image
50/50 [==============================] - 0s 2ms/step
No description has been provided for this image
In [ ]:
##Save KNN Model
import pickle
with open('banana_quilty.pkl','wb') as f:
    pickle.dump(model,f)
In [ ]:

In [ ]:
from joblib import dump
dump(model, 'banana_quilty.joblib')
Out[ ]:
['banana_quilty.joblib']
In [ ]:
model.save('banana_model.h5')
/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py:3103: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.
  saving_api.save_model(
In [ ]:
from keras.models import load_model
model = load_model('banana_model.h5')
In [ ]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import joblib

# Load the dataset
data = df

# Split features and target variable
# X = data.drop('Quality', axis=1)  # Replace 'Quality' with the actual name of your target column
# y = data['Quality']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the MLPClassifier
model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate the model
accuracy = model.score(X_test_scaled, y_test)
print("Accuracy:", accuracy)

# Save the trained model using joblib
joblib.dump(model, 'banana_quilty.joblib')

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

# Predictions on the test set
y_pred = model.predict(X_test_scaled)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Plot Confusion Matrix
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred))
Accuracy: 0.98625
Confusion Matrix:
[[808  11]
 [ 11 770]]
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       819
           1       0.99      0.99      0.99       781

    accuracy                           0.99      1600
   macro avg       0.99      0.99      0.99      1600
weighted avg       0.99      0.99      0.99      1600

In [ ]: